bitkeeper revision 1.1159.1.99 (412b0f11cFbfdCRdP2-GJYp0ANDfUA)

author kaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>

Tue, 24 Aug 2004 09:49:05 +0000 (09:49 +0000)

committer kaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>

Tue, 24 Aug 2004 09:49:05 +0000 (09:49 +0000)
author kaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>
Tue, 24 Aug 2004 09:49:05 +0000 (09:49 +0000)
committer kaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>
Tue, 24 Aug 2004 09:49:05 +0000 (09:49 +0000)
diff --cc linux-2.4.27-xen-sparse/arch/xen/kernel/ldt.c

index b0613a17b96641944f1a0fb016246f3a6e97a0f5,0000000000000000000000000000000000000000..9e14b0855b5975e295b8223495c1368ec9a8b0ff

mode 100644,000000..100644
--- 1/linux-2.4.27-xen-sparse/arch/xen/kernel/ldt.c
--- /dev/null
+++ b/linux-2.4.27-xen-sparse/arch/xen/kernel/ldt.c
@@@ -1,271 -1,0 +1,271 @@@
-               make_pages_writeable(
+ +/*
+ + * linux/kernel/ldt.c
+ + *
+ + * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
+ + * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
+ + */
+ +
+ +#include <linux/errno.h>
+ +#include <linux/sched.h>
+ +#include <linux/string.h>
+ +#include <linux/mm.h>
+ +#include <linux/smp.h>
+ +#include <linux/smp_lock.h>
+ +#include <linux/vmalloc.h>
+ +#include <linux/slab.h>
+ +
+ +#include <asm/uaccess.h>
+ +#include <asm/system.h>
+ +#include <asm/ldt.h>
+ +#include <asm/desc.h>
+ +
+ +#ifdef CONFIG_SMP /* avoids "defined but not used" warnig */
+ +static void flush_ldt(void *mm)
+ +{
+ +      if (current->active_mm)
+ +              load_LDT(&current->active_mm->context);
+ +}
+ +#endif
+ +
+ +static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
+ +{
+ +      void *oldldt;
+ +      void *newldt;
+ +      int oldsize;
+ +
+ +      if (mincount <= pc->size)
+ +              return 0;
+ +      oldsize = pc->size;
+ +      mincount = (mincount+511)&(~511);
+ +      if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE)
+ +              newldt = vmalloc(mincount*LDT_ENTRY_SIZE);
+ +      else
+ +              newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL);
+ +
+ +      if (!newldt)
+ +              return -ENOMEM;
+ +
+ +      if (oldsize)
+ +              memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE);
+ +
+ +      oldldt = pc->ldt;
+ +      memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, (mincount-oldsize)*LDT_ENTRY_SIZE);
+ +      wmb();
+ +      pc->ldt = newldt;
+ +      pc->size = mincount;
+ +      if (reload) {
+ +              make_pages_readonly(
+ +                      pc->ldt,
+ +                      (pc->size*LDT_ENTRY_SIZE)/PAGE_SIZE);
+ +              load_LDT(pc);
+ +              flush_page_update_queue();
+ +#ifdef CONFIG_SMP
+ +              if (current->mm->cpu_vm_mask != (1<<smp_processor_id()))
+ +                      smp_call_function(flush_ldt, 0, 1, 1);
+ +#endif
+ +      }
+ +      wmb();
+ +      if (oldsize) {
+ +              if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE)
+ +                      vfree(oldldt);
+ +              else
+ +                      kfree(oldldt);
+ +      }
+ +      return 0;
+ +}
+ +
+ +static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
+ +{
+ +      int err = alloc_ldt(new, old->size, 0);
+ +      if (err < 0) {
+ +              printk(KERN_WARNING "ldt allocation failed\n");
+ +              new->size = 0;
+ +              return err;
+ +      }
+ +      memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE);
+ +      make_pages_readonly(new->ldt, (new->size*LDT_ENTRY_SIZE)/PAGE_SIZE);
+ +      return 0;
+ +}
+ +
+ +/*
+ + * we do not have to muck with descriptors here, that is
+ + * done in switch_mm() as needed.
+ + */
+ +int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
+ +{
+ +      struct mm_struct * old_mm;
+ +      int retval = 0;
+ +
+ +      init_MUTEX(&mm->context.sem);
+ +      mm->context.size = 0;
+ +      old_mm = current->mm;
+ +      if (old_mm && old_mm->context.size > 0) {
+ +              down(&old_mm->context.sem);
+ +              retval = copy_ldt(&mm->context, &old_mm->context);
+ +              up(&old_mm->context.sem);
+ +      }
+ +      return retval;
+ +}
+ +
+ +/*
+ + * No need to lock the MM as we are the last user
+ + * Do not touch the ldt register, we are already
+ + * in the next thread.
+ + */
+ +void destroy_context(struct mm_struct *mm)
+ +{
+ +      if (mm->context.size) {
++              make_pages_writable(
+ +                      mm->context.ldt, 
+ +                      (mm->context.size*LDT_ENTRY_SIZE)/PAGE_SIZE);
+ +              flush_page_update_queue();
+ +              if (mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE)
+ +                      vfree(mm->context.ldt);
+ +              else
+ +                      kfree(mm->context.ldt);
+ +              mm->context.size = 0;
+ +      }
+ +}
+ +
+ +static int read_ldt(void * ptr, unsigned long bytecount)
+ +{
+ +      int err;
+ +      unsigned long size;
+ +      struct mm_struct * mm = current->mm;
+ +
+ +      if (!mm->context.size)
+ +              return 0;
+ +      if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES)
+ +              bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES;
+ +
+ +      down(&mm->context.sem);
+ +      size = mm->context.size*LDT_ENTRY_SIZE;
+ +      if (size > bytecount)
+ +              size = bytecount;
+ +
+ +      err = 0;
+ +      if (copy_to_user(ptr, mm->context.ldt, size))
+ +              err = -EFAULT;
+ +      up(&mm->context.sem);
+ +      if (err < 0)
+ +              return err;
+ +      if (size != bytecount) {
+ +              /* zero-fill the rest */
+ +              clear_user(ptr+size, bytecount-size);
+ +      }
+ +      return bytecount;
+ +}
+ +
+ +static int read_default_ldt(void * ptr, unsigned long bytecount)
+ +{
+ +      int err;
+ +      unsigned long size;
+ +      void *address;
+ +
+ +      err = 0;
+ +      address = &default_ldt[0];
+ +      size = 5*sizeof(struct desc_struct);
+ +      if (size > bytecount)
+ +              size = bytecount;
+ +
+ +      err = size;
+ +      if (copy_to_user(ptr, address, size))
+ +              err = -EFAULT;
+ +
+ +      return err;
+ +}
+ +
+ +static int write_ldt(void * ptr, unsigned long bytecount, int oldmode)
+ +{
+ +      struct mm_struct * mm = current->mm;
+ +      __u32 entry_1, entry_2, *lp;
+ +      unsigned long phys_lp;
+ +      int error;
+ +      struct modify_ldt_ldt_s ldt_info;
+ +
+ +      error = -EINVAL;
+ +      if (bytecount != sizeof(ldt_info))
+ +              goto out;
+ +      error = -EFAULT;        
+ +      if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info)))
+ +              goto out;
+ +
+ +      error = -EINVAL;
+ +      if (ldt_info.entry_number >= LDT_ENTRIES)
+ +              goto out;
+ +      if (ldt_info.contents == 3) {
+ +              if (oldmode)
+ +                      goto out;
+ +              if (ldt_info.seg_not_present == 0)
+ +                      goto out;
+ +      }
+ +
+ +      down(&mm->context.sem);
+ +      if (ldt_info.entry_number >= mm->context.size) {
+ +              error = alloc_ldt(&current->mm->context, ldt_info.entry_number+1, 1);
+ +              if (error < 0)
+ +                      goto out_unlock;
+ +      }
+ +
+ +      lp = (__u32 *) ((ldt_info.entry_number << 3) + (char *) mm->context.ldt);
+ +      phys_lp = arbitrary_virt_to_phys(lp);
+ +
+ +      /* Allow LDTs to be cleared by the user. */
+ +      if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
+ +              if (oldmode ||
+ +                  (ldt_info.contents == 0             &&
+ +                   ldt_info.read_exec_only == 1       &&
+ +                   ldt_info.seg_32bit == 0            &&
+ +                   ldt_info.limit_in_pages == 0       &&
+ +                   ldt_info.seg_not_present == 1      &&
+ +                   ldt_info.useable == 0 )) {
+ +                      entry_1 = 0;
+ +                      entry_2 = 0;
+ +                      goto install;
+ +              }
+ +      }
+ +
+ +      entry_1 = ((ldt_info.base_addr & 0x0000ffff) << 16) |
+ +                (ldt_info.limit & 0x0ffff);
+ +      entry_2 = (ldt_info.base_addr & 0xff000000) |
+ +                ((ldt_info.base_addr & 0x00ff0000) >> 16) |
+ +                (ldt_info.limit & 0xf0000) |
+ +                ((ldt_info.read_exec_only ^ 1) << 9) |
+ +                (ldt_info.contents << 10) |
+ +                ((ldt_info.seg_not_present ^ 1) << 15) |
+ +                (ldt_info.seg_32bit << 22) |
+ +                (ldt_info.limit_in_pages << 23) |
+ +                0x7000;
+ +      if (!oldmode)
+ +              entry_2 |= (ldt_info.useable << 20);
+ +
+ +      /* Install the new entry ...  */
+ +install:
+ +      error = HYPERVISOR_update_descriptor(phys_lp, entry_1, entry_2);
+ +
+ +out_unlock:
+ +      up(&mm->context.sem);
+ +out:
+ +      return error;
+ +}
+ +
+ +asmlinkage int sys_modify_ldt(int func, void *ptr, unsigned long bytecount)
+ +{
+ +      int ret = -ENOSYS;
+ +
+ +      switch (func) {
+ +      case 0:
+ +              ret = read_ldt(ptr, bytecount);
+ +              break;
+ +      case 1:
+ +              ret = write_ldt(ptr, bytecount, 1);
+ +              break;
+ +      case 2:
+ +              ret = read_default_ldt(ptr, bytecount);
+ +              break;
+ +      case 0x11:
+ +              ret = write_ldt(ptr, bytecount, 0);
+ +              break;
+ +      }
+ +      return ret;
+ +}
diff --cc linux-2.4.27-xen-sparse/arch/xen/kernel/traps.c

index aea85ffca6a208c748220e0c19bb6c627db89876,0000000000000000000000000000000000000000..803d8f72b417eb9b3e3bed4616de9289a408a05e

mode 100644,000000..100644
--- 1/linux-2.4.27-xen-sparse/arch/xen/kernel/traps.c
--- /dev/null
+++ b/linux-2.4.27-xen-sparse/arch/xen/kernel/traps.c
@@@ -1,691 -1,0 +1,691 @@@
-  * NB. Perhaps this can all go away after we have implemented writeable
+ +/*
+ + *  linux/arch/i386/traps.c
+ + *
+ + *  Copyright (C) 1991, 1992  Linus Torvalds
+ + *
+ + *  Pentium III FXSR, SSE support
+ + *    Gareth Hughes <gareth@valinux.com>, May 2000
+ + */
+ +
+ +/*
+ + * 'Traps.c' handles hardware traps and faults after we have saved some
+ + * state in 'asm.s'.
+ + */
+ +#include <linux/config.h>
+ +#include <linux/sched.h>
+ +#include <linux/kernel.h>
+ +#include <linux/string.h>
+ +#include <linux/errno.h>
+ +#include <linux/ptrace.h>
+ +#include <linux/timer.h>
+ +#include <linux/mm.h>
+ +#include <linux/init.h>
+ +#include <linux/delay.h>
+ +#include <linux/spinlock.h>
+ +#include <linux/interrupt.h>
+ +#include <linux/highmem.h>
+ +
+ +#include <asm/system.h>
+ +#include <asm/uaccess.h>
+ +#include <asm/io.h>
+ +#include <asm/atomic.h>
+ +#include <asm/debugreg.h>
+ +#include <asm/desc.h>
+ +#include <asm/i387.h>
+ +
+ +#include <asm/smp.h>
+ +#include <asm/pgalloc.h>
+ +
+ +#include <asm/hypervisor.h>
+ +
+ +#include <linux/irq.h>
+ +#include <linux/module.h>
+ +
+ +asmlinkage int system_call(void);
+ +asmlinkage void lcall7(void);
+ +asmlinkage void lcall27(void);
+ +
+ +asmlinkage void divide_error(void);
+ +asmlinkage void debug(void);
+ +asmlinkage void int3(void);
+ +asmlinkage void overflow(void);
+ +asmlinkage void bounds(void);
+ +asmlinkage void invalid_op(void);
+ +asmlinkage void device_not_available(void);
+ +asmlinkage void double_fault(void);
+ +asmlinkage void coprocessor_segment_overrun(void);
+ +asmlinkage void invalid_TSS(void);
+ +asmlinkage void segment_not_present(void);
+ +asmlinkage void stack_segment(void);
+ +asmlinkage void general_protection(void);
+ +asmlinkage void page_fault(void);
+ +asmlinkage void safe_page_fault(void);
+ +asmlinkage void coprocessor_error(void);
+ +asmlinkage void simd_coprocessor_error(void);
+ +asmlinkage void alignment_check(void);
+ +asmlinkage void fixup_4gb_segment(void);
+ +asmlinkage void machine_check(void);
+ +
+ +int kstack_depth_to_print = 24;
+ +
+ +
+ +/*
+ + * If the address is either in the .text section of the
+ + * kernel, or in the vmalloc'ed module regions, it *may* 
+ + * be the address of a calling routine
+ + */
+ +
+ +#ifdef CONFIG_MODULES
+ +
+ +extern struct module *module_list;
+ +extern struct module kernel_module;
+ +
+ +static inline int kernel_text_address(unsigned long addr)
+ +{
+ +      int retval = 0;
+ +      struct module *mod;
+ +
+ +      if (addr >= (unsigned long) &_stext &&
+ +          addr <= (unsigned long) &_etext)
+ +              return 1;
+ +
+ +      for (mod = module_list; mod != &kernel_module; mod = mod->next) {
+ +              /* mod_bound tests for addr being inside the vmalloc'ed
+ +               * module area. Of course it'd be better to test only
+ +               * for the .text subset... */
+ +              if (mod_bound(addr, 0, mod)) {
+ +                      retval = 1;
+ +                      break;
+ +              }
+ +      }
+ +
+ +      return retval;
+ +}
+ +
+ +#else
+ +
+ +static inline int kernel_text_address(unsigned long addr)
+ +{
+ +      return (addr >= (unsigned long) &_stext &&
+ +              addr <= (unsigned long) &_etext);
+ +}
+ +
+ +#endif
+ +
+ +void show_trace(unsigned long * stack)
+ +{
+ +      int i;
+ +      unsigned long addr;
+ +
+ +      if (!stack)
+ +              stack = (unsigned long*)&stack;
+ +
+ +      printk("Call Trace: ");
+ +      i = 1;
+ +      while (((long) stack & (THREAD_SIZE-1)) != 0) {
+ +              addr = *stack++;
+ +              if (kernel_text_address(addr)) {
+ +                      if (i && ((i % 6) == 0))
+ +                              printk("\n   ");
+ +                      printk("[<%08lx>] ", addr);
+ +                      i++;
+ +              }
+ +      }
+ +      printk("\n");
+ +}
+ +
+ +void show_trace_task(struct task_struct *tsk)
+ +{
+ +      unsigned long esp = tsk->thread.esp;
+ +
+ +      /* User space on another CPU? */
+ +      if ((esp ^ (unsigned long)tsk) & (PAGE_MASK<<1))
+ +              return;
+ +      show_trace((unsigned long *)esp);
+ +}
+ +
+ +void show_stack(unsigned long * esp)
+ +{
+ +      unsigned long *stack;
+ +      int i;
+ +
+ +      // debugging aid: "show_stack(NULL);" prints the
+ +      // back trace for this cpu.
+ +
+ +      if(esp==NULL)
+ +              esp=(unsigned long*)&esp;
+ +
+ +      stack = esp;
+ +      for(i=0; i < kstack_depth_to_print; i++) {
+ +              if (((long) stack & (THREAD_SIZE-1)) == 0)
+ +                      break;
+ +              if (i && ((i % 8) == 0))
+ +                      printk("\n       ");
+ +              printk("%08lx ", *stack++);
+ +      }
+ +      printk("\n");
+ +      show_trace(esp);
+ +}
+ +
+ +void show_registers(struct pt_regs *regs)
+ +{
+ +      int in_kernel = 1;
+ +      unsigned long esp;
+ +      unsigned short ss;
+ +
+ +      esp = (unsigned long) (&regs->esp);
+ +      ss = __KERNEL_DS;
+ +      if (regs->xcs & 2) {
+ +              in_kernel = 0;
+ +              esp = regs->esp;
+ +              ss = regs->xss & 0xffff;
+ +      }
+ +      printk(KERN_ALERT "CPU:    %d\n", smp_processor_id() );
+ +      printk(KERN_ALERT "EIP:    %04x:[<%08lx>]    %s\n",
+ +             0xffff & regs->xcs, regs->eip, print_tainted());
+ +      printk(KERN_ALERT "EFLAGS: %08lx\n",regs->eflags);
+ +      printk(KERN_ALERT "eax: %08lx   ebx: %08lx   ecx: %08lx   edx: %08lx\n",
+ +              regs->eax, regs->ebx, regs->ecx, regs->edx);
+ +      printk(KERN_ALERT "esi: %08lx   edi: %08lx   ebp: %08lx   esp: %08lx\n",
+ +              regs->esi, regs->edi, regs->ebp, esp);
+ +      printk(KERN_ALERT "ds: %04x   es: %04x   ss: %04x\n",
+ +              regs->xds & 0xffff, regs->xes & 0xffff, ss);
+ +      printk(KERN_ALERT "Process %s (pid: %d, stackpage=%08lx)",
+ +              current->comm, current->pid, 4096+(unsigned long)current);
+ +      /*
+ +       * When in-kernel, we also print out the stack and code at the
+ +       * time of the fault..
+ +       */
+ +      if (in_kernel) {
+ +
+ +              printk(KERN_ALERT "\nStack: ");
+ +              show_stack((unsigned long*)esp);
+ +
+ +#if 0
+ +                {
+ +                        int i;
+ +                      printk(KERN_ALERT "\nCode: ");
+ +                      if(regs->eip < PAGE_OFFSET)
+ +                              goto bad;
+ +
+ +                      for(i=0;i<20;i++)
+ +                      {
+ +                              unsigned char c;
+ +                              if(__get_user(c, &((unsigned char*)regs->eip)[i])) {
+ +bad:
+ +                                      printk(KERN_ALERT " Bad EIP value.");
+ +                                      break;
+ +                              }
+ +                              printk("%02x ", c);
+ +                      }
+ +              }
+ +#endif
+ +      }
+ +      printk(KERN_ALERT "\n");
+ +}     
+ +
+ +spinlock_t die_lock = SPIN_LOCK_UNLOCKED;
+ +
+ +void die(const char * str, struct pt_regs * regs, long err)
+ +{
+ +      console_verbose();
+ +      spin_lock_irq(&die_lock);
+ +      bust_spinlocks(1);
+ +      printk("%s: %04lx\n", str, err & 0xffff);
+ +      show_registers(regs);
+ +      bust_spinlocks(0);
+ +      spin_unlock_irq(&die_lock);
+ +      do_exit(SIGSEGV);
+ +}
+ +
+ +static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err)
+ +{
+ +      if (!(2 & regs->xcs))
+ +              die(str, regs, err);
+ +}
+ +
+ +
+ +static void inline do_trap(int trapnr, int signr, char *str,
+ +                         struct pt_regs * regs, long error_code,
+ +                           siginfo_t *info)
+ +{
+ +      if (!(regs->xcs & 2))
+ +              goto kernel_trap;
+ +
+ +      /*trap_signal:*/ {
+ +              struct task_struct *tsk = current;
+ +              tsk->thread.error_code = error_code;
+ +              tsk->thread.trap_no = trapnr;
+ +              if (info)
+ +                      force_sig_info(signr, info, tsk);
+ +              else
+ +                      force_sig(signr, tsk);
+ +              return;
+ +      }
+ +
+ +      kernel_trap: {
+ +              unsigned long fixup = search_exception_table(regs->eip);
+ +              if (fixup)
+ +                      regs->eip = fixup;
+ +              else    
+ +                      die(str, regs, error_code);
+ +              return;
+ +      }
+ +}
+ +
+ +#define DO_ERROR(trapnr, signr, str, name) \
+ +asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
+ +{ \
+ +      do_trap(trapnr, signr, str, regs, error_code, NULL); \
+ +}
+ +
+ +#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
+ +asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
+ +{ \
+ +      siginfo_t info; \
+ +      info.si_signo = signr; \
+ +      info.si_errno = 0; \
+ +      info.si_code = sicode; \
+ +      info.si_addr = (void *)siaddr; \
+ +      do_trap(trapnr, signr, str, regs, error_code, &info); \
+ +}
+ +
+ +DO_ERROR_INFO( 0, SIGFPE,  "divide error", divide_error, FPE_INTDIV, regs->eip)
+ +DO_ERROR( 3, SIGTRAP, "int3", int3)
+ +DO_ERROR( 4, SIGSEGV, "overflow", overflow)
+ +DO_ERROR( 5, SIGSEGV, "bounds", bounds)
+ +DO_ERROR_INFO( 6, SIGILL,  "invalid operand", invalid_op, ILL_ILLOPN, regs->eip)
+ +DO_ERROR( 7, SIGSEGV, "device not available", device_not_available)
+ +DO_ERROR( 8, SIGSEGV, "double fault", double_fault)
+ +DO_ERROR( 9, SIGFPE,  "coprocessor segment overrun", coprocessor_segment_overrun)
+ +DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
+ +DO_ERROR(11, SIGBUS,  "segment not present", segment_not_present)
+ +DO_ERROR(12, SIGBUS,  "stack segment", stack_segment)
+ +DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
+ +DO_ERROR(18, SIGBUS, "machine check", machine_check)
+ +
+ +asmlinkage void do_general_protection(struct pt_regs * regs, long error_code)
+ +{
+ +      /*
+ +       * If we trapped on an LDT access then ensure that the default_ldt is
+ +       * loaded, if nothing else. We load default_ldt lazily because LDT
+ +       * switching costs time and many applications don't need it.
+ +       */
+ +      if ( unlikely((error_code & 6) == 4) )
+ +      {
+ +              unsigned long ldt;
+ +              __asm__ __volatile__ ( "sldt %0" : "=r" (ldt) );
+ +              if ( ldt == 0 )
+ +              {
+ +                  mmu_update_t u;
+ +                  u.ptr  = MMU_EXTENDED_COMMAND;
+ +                  u.ptr |= (unsigned long)&default_ldt[0];
+ +                  u.val  = MMUEXT_SET_LDT | (5 << MMUEXT_CMD_SHIFT);
+ +                  if ( unlikely(HYPERVISOR_mmu_update(&u, 1, NULL) < 0) )
+ +                  {
+ +                      show_trace(NULL);
+ +                      panic("Failed to install default LDT");
+ +                  }
+ +                  return;
+ +              }
+ +      }
+ +
+ +      if (!(regs->xcs & 2))
+ +              goto gp_in_kernel;
+ +
+ +      current->thread.error_code = error_code;
+ +      current->thread.trap_no = 13;
+ +      force_sig(SIGSEGV, current);
+ +      return;
+ +
+ +gp_in_kernel:
+ +      {
+ +              unsigned long fixup;
+ +              fixup = search_exception_table(regs->eip);
+ +              if (fixup) {
+ +                      regs->eip = fixup;
+ +                      return;
+ +              }
+ +              die("general protection fault", regs, error_code);
+ +      }
+ +}
+ +
+ +
+ +asmlinkage void do_debug(struct pt_regs * regs, long error_code)
+ +{
+ +    unsigned int condition;
+ +    struct task_struct *tsk = current;
+ +    siginfo_t info;
+ +
+ +    condition = HYPERVISOR_get_debugreg(6);
+ +
+ +    /* Mask out spurious debug traps due to lazy DR7 setting */
+ +    if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
+ +        if (!tsk->thread.debugreg[7])
+ +            goto clear_dr7;
+ +    }
+ +
+ +    /* Save debug status register where ptrace can see it */
+ +    tsk->thread.debugreg[6] = condition;
+ +
+ +    /* Mask out spurious TF errors due to lazy TF clearing */
+ +    if (condition & DR_STEP) {
+ +        /*
+ +         * The TF error should be masked out only if the current
+ +         * process is not traced and if the TRAP flag has been set
+ +         * previously by a tracing process (condition detected by
+ +         * the PT_DTRACE flag); remember that the i386 TRAP flag
+ +         * can be modified by the process itself in user mode,
+ +         * allowing programs to debug themselves without the ptrace()
+ +         * interface.
+ +         */
+ +        if ((tsk->ptrace & (PT_DTRACE|PT_PTRACED)) == PT_DTRACE)
+ +            goto clear_TF;
+ +    }
+ +
+ +    /* Ok, finally something we can handle */
+ +    tsk->thread.trap_no = 1;
+ +    tsk->thread.error_code = error_code;
+ +    info.si_signo = SIGTRAP;
+ +    info.si_errno = 0;
+ +    info.si_code = TRAP_BRKPT;
+ +        
+ +    /* If this is a kernel mode trap, save the user PC on entry to 
+ +     * the kernel, that's what the debugger can make sense of.
+ +     */
+ +    info.si_addr = ((regs->xcs & 2) == 0) ? (void *)tsk->thread.eip : 
+ +                                            (void *)regs->eip;
+ +    force_sig_info(SIGTRAP, &info, tsk);
+ +
+ +    /* Disable additional traps. They'll be re-enabled when
+ +     * the signal is delivered.
+ +     */
+ + clear_dr7:
+ +    HYPERVISOR_set_debugreg(7, 0);
+ +    return;
+ +
+ + clear_TF:
+ +    regs->eflags &= ~TF_MASK;
+ +    return;
+ +}
+ +
+ +
+ +/*
+ + * Note that we play around with the 'TS' bit in an attempt to get
+ + * the correct behaviour even in the presence of the asynchronous
+ + * IRQ13 behaviour
+ + */
+ +void math_error(void *eip)
+ +{
+ +      struct task_struct * task;
+ +      siginfo_t info;
+ +      unsigned short cwd, swd;
+ +
+ +      /*
+ +       * Save the info for the exception handler and clear the error.
+ +       */
+ +      task = current;
+ +      save_init_fpu(task);
+ +      task->thread.trap_no = 16;
+ +      task->thread.error_code = 0;
+ +      info.si_signo = SIGFPE;
+ +      info.si_errno = 0;
+ +      info.si_code = __SI_FAULT;
+ +      info.si_addr = eip;
+ +      /*
+ +       * (~cwd & swd) will mask out exceptions that are not set to unmasked
+ +       * status.  0x3f is the exception bits in these regs, 0x200 is the
+ +       * C1 reg you need in case of a stack fault, 0x040 is the stack
+ +       * fault bit.  We should only be taking one exception at a time,
+ +       * so if this combination doesn't produce any single exception,
+ +       * then we have a bad program that isn't syncronizing its FPU usage
+ +       * and it will suffer the consequences since we won't be able to
+ +       * fully reproduce the context of the exception
+ +       */
+ +      cwd = get_fpu_cwd(task);
+ +      swd = get_fpu_swd(task);
+ +      switch (((~cwd) & swd & 0x3f) | (swd & 0x240)) {
+ +              case 0x000:
+ +              default:
+ +                      break;
+ +              case 0x001: /* Invalid Op */
+ +              case 0x041: /* Stack Fault */
+ +              case 0x241: /* Stack Fault | Direction */
+ +                      info.si_code = FPE_FLTINV;
+ +                      break;
+ +              case 0x002: /* Denormalize */
+ +              case 0x010: /* Underflow */
+ +                      info.si_code = FPE_FLTUND;
+ +                      break;
+ +              case 0x004: /* Zero Divide */
+ +                      info.si_code = FPE_FLTDIV;
+ +                      break;
+ +              case 0x008: /* Overflow */
+ +                      info.si_code = FPE_FLTOVF;
+ +                      break;
+ +              case 0x020: /* Precision */
+ +                      info.si_code = FPE_FLTRES;
+ +                      break;
+ +      }
+ +      force_sig_info(SIGFPE, &info, task);
+ +}
+ +
+ +asmlinkage void do_coprocessor_error(struct pt_regs * regs, long error_code)
+ +{
+ +      ignore_irq13 = 1;
+ +      math_error((void *)regs->eip);
+ +}
+ +
+ +void simd_math_error(void *eip)
+ +{
+ +      struct task_struct * task;
+ +      siginfo_t info;
+ +      unsigned short mxcsr;
+ +
+ +      /*
+ +       * Save the info for the exception handler and clear the error.
+ +       */
+ +      task = current;
+ +      save_init_fpu(task);
+ +      task->thread.trap_no = 19;
+ +      task->thread.error_code = 0;
+ +      info.si_signo = SIGFPE;
+ +      info.si_errno = 0;
+ +      info.si_code = __SI_FAULT;
+ +      info.si_addr = eip;
+ +      /*
+ +       * The SIMD FPU exceptions are handled a little differently, as there
+ +       * is only a single status/control register.  Thus, to determine which
+ +       * unmasked exception was caught we must mask the exception mask bits
+ +       * at 0x1f80, and then use these to mask the exception bits at 0x3f.
+ +       */
+ +      mxcsr = get_fpu_mxcsr(task);
+ +      switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) {
+ +              case 0x000:
+ +              default:
+ +                      break;
+ +              case 0x001: /* Invalid Op */
+ +                      info.si_code = FPE_FLTINV;
+ +                      break;
+ +              case 0x002: /* Denormalize */
+ +              case 0x010: /* Underflow */
+ +                      info.si_code = FPE_FLTUND;
+ +                      break;
+ +              case 0x004: /* Zero Divide */
+ +                      info.si_code = FPE_FLTDIV;
+ +                      break;
+ +              case 0x008: /* Overflow */
+ +                      info.si_code = FPE_FLTOVF;
+ +                      break;
+ +              case 0x020: /* Precision */
+ +                      info.si_code = FPE_FLTRES;
+ +                      break;
+ +      }
+ +      force_sig_info(SIGFPE, &info, task);
+ +}
+ +
+ +asmlinkage void do_simd_coprocessor_error(struct pt_regs * regs,
+ +                                        long error_code)
+ +{
+ +      if (cpu_has_xmm) {
+ +              /* Handle SIMD FPU exceptions on PIII+ processors. */
+ +              ignore_irq13 = 1;
+ +              simd_math_error((void *)regs->eip);
+ +      } else {
+ +              die_if_kernel("cache flush denied", regs, error_code);
+ +              current->thread.trap_no = 19;
+ +              current->thread.error_code = error_code;
+ +              force_sig(SIGSEGV, current);
+ +      }
+ +}
+ +
+ +/*
+ + *  'math_state_restore()' saves the current math information in the
+ + * old math state array, and gets the new ones from the current task
+ + *
+ + * Careful.. There are problems with IBM-designed IRQ13 behaviour.
+ + * Don't touch unless you *really* know how it works.
+ + */
+ +asmlinkage void math_state_restore(struct pt_regs regs)
+ +{
+ +      /*
+ +       * A trap in kernel mode can be ignored. It'll be the fast XOR or
+ +       * copying libraries, which will correctly save/restore state and
+ +       * reset the TS bit in CR0.
+ +       */
+ +      if ( (regs.xcs & 2) == 0 )
+ +              return;
+ +
+ +      if (current->used_math) {
+ +              restore_fpu(current);
+ +      } else {
+ +              init_fpu();
+ +      }
+ +      current->flags |= PF_USEDFPU;   /* So we fnsave on switch_to() */
+ +}
+ +
+ +
+ +#define _set_gate(gate_addr,type,dpl,addr) \
+ +do { \
+ +  int __d0, __d1; \
+ +  __asm__ __volatile__ ("movw %%dx,%%ax\n\t" \
+ +      "movw %4,%%dx\n\t" \
+ +      "movl %%eax,%0\n\t" \
+ +      "movl %%edx,%1" \
+ +      :"=m" (*((long *) (gate_addr))), \
+ +       "=m" (*(1+(long *) (gate_addr))), "=&a" (__d0), "=&d" (__d1) \
+ +      :"i" ((short) (0x8000+(dpl<<13)+(type<<8))), \
+ +       "3" ((char *) (addr)),"2" (__KERNEL_CS << 16)); \
+ +} while (0)
+ +
+ +static void __init set_call_gate(void *a, void *addr)
+ +{
+ +      _set_gate(a,12,3,addr);
+ +}
+ +
+ +
+ +/* NB. All these are "trap gates" (i.e. events_mask isn't cleared). */
+ +static trap_info_t trap_table[] = {
+ +    {  0, 0, __KERNEL_CS, (unsigned long)divide_error                },
+ +    {  1, 0, __KERNEL_CS, (unsigned long)debug                       },
+ +    {  3, 3, __KERNEL_CS, (unsigned long)int3                        },
+ +    {  4, 3, __KERNEL_CS, (unsigned long)overflow                    },
+ +    {  5, 3, __KERNEL_CS, (unsigned long)bounds                      },
+ +    {  6, 0, __KERNEL_CS, (unsigned long)invalid_op                  },
+ +    {  7, 0, __KERNEL_CS, (unsigned long)device_not_available        },
+ +    {  8, 0, __KERNEL_CS, (unsigned long)double_fault                },
+ +    {  9, 0, __KERNEL_CS, (unsigned long)coprocessor_segment_overrun },
+ +    { 10, 0, __KERNEL_CS, (unsigned long)invalid_TSS                 },
+ +    { 11, 0, __KERNEL_CS, (unsigned long)segment_not_present         },
+ +    { 12, 0, __KERNEL_CS, (unsigned long)stack_segment               },
+ +    { 13, 0, __KERNEL_CS, (unsigned long)general_protection          },
+ +    { 14, 0, __KERNEL_CS, (unsigned long)page_fault                  },
+ +    { 15, 0, __KERNEL_CS, (unsigned long)fixup_4gb_segment           },
+ +    { 16, 0, __KERNEL_CS, (unsigned long)coprocessor_error           },
+ +    { 17, 0, __KERNEL_CS, (unsigned long)alignment_check             },
+ +    { 18, 0, __KERNEL_CS, (unsigned long)machine_check               },
+ +    { 19, 0, __KERNEL_CS, (unsigned long)simd_coprocessor_error      },
+ +    { SYSCALL_VECTOR, 
+ +          3, __KERNEL_CS, (unsigned long)system_call                 },
+ +    {  0, 0,           0, 0                           }
+ +};
+ +
+ +
+ +void __init trap_init(void)
+ +{
+ +    HYPERVISOR_set_trap_table(trap_table);    
+ +    HYPERVISOR_set_fast_trap(SYSCALL_VECTOR);
+ +
+ +    /*
+ +     * The default LDT is a single-entry callgate to lcall7 for iBCS and a
+ +     * callgate to lcall27 for Solaris/x86 binaries.
+ +     */
+ +    clear_page(&default_ldt[0]);
+ +    set_call_gate(&default_ldt[0],lcall7);
+ +    set_call_gate(&default_ldt[4],lcall27);
+ +    __make_page_readonly(&default_ldt[0]);
+ +
+ +    cpu_init();
+ +}
+ +
+ +
+ +/*
+ + * install_safe_pf_handler / install_normal_pf_handler:
+ + * 
+ + * These are used within the failsafe_callback handler in entry.S to avoid
+ + * taking a full page fault when reloading FS and GS. This is because FS and 
+ + * GS could be invalid at pretty much any point while Xenolinux executes (we 
+ + * don't set them to safe values on entry to the kernel). At *any* point Xen 
+ + * may be entered due to a hardware interrupt --- on exit from Xen an invalid 
+ + * FS/GS will cause our failsafe_callback to be executed. This could occur, 
+ + * for example, while the mmu_update_queue is in an inconsistent state. This
+ + * is disastrous because the normal page-fault handler touches the update
+ + * queue!
+ + * 
+ + * Fortunately, within the failsafe handler it is safe to force DS/ES/FS/GS
+ + * to zero if they cannot be reloaded -- at this point executing a normal
+ + * page fault would not change this effect. The safe page-fault handler
+ + * ensures this end result (blow away the selector value) without the dangers
+ + * of the normal page-fault handler.
+ + * 
++ * NB. Perhaps this can all go away after we have implemented writable
+ + * page tables. :-)
+ + */
+ +
+ +asmlinkage void do_safe_page_fault(struct pt_regs *regs, 
+ +                                   unsigned long error_code,
+ +                                   unsigned long address)
+ +{
+ +    unsigned long fixup;
+ +
+ +    if ( (fixup = search_exception_table(regs->eip)) != 0 )
+ +    {
+ +        regs->eip = fixup;
+ +        return;
+ +    }
+ +
+ +    die("Unhandleable 'safe' page fault!", regs, error_code);
+ +}
+ +
+ +unsigned long install_safe_pf_handler(void)
+ +{
+ +    static trap_info_t safe_pf[] = { 
+ +        { 14, 0, __KERNEL_CS, (unsigned long)safe_page_fault },
+ +        {  0, 0,           0, 0                              }
+ +    };
+ +    unsigned long flags;
+ +    local_irq_save(flags);
+ +    HYPERVISOR_set_trap_table(safe_pf);
+ +    return flags; /* This is returned in %%eax */
+ +}
+ +
+ +__attribute__((regparm(3))) /* This function take its arg in %%eax */
+ +void install_normal_pf_handler(unsigned long flags)
+ +{
+ +    static trap_info_t normal_pf[] = { 
+ +        { 14, 0, __KERNEL_CS, (unsigned long)page_fault },
+ +        {  0, 0,           0, 0                         }
+ +    };
+ +    HYPERVISOR_set_trap_table(normal_pf);
+ +    local_irq_restore(flags);
+ +}
diff --cc linux-2.4.27-xen-sparse/include/asm-xen/pgalloc.h

index a0d9d506efaf47a3f5d3f916fdf60594dc60e6fa,0000000000000000000000000000000000000000..f6bee4d6895595670f9f8989430147f295b40648

mode 100644,000000..100644
--- 1/linux-2.4.27-xen-sparse/include/asm-xen/pgalloc.h
--- /dev/null
+++ b/linux-2.4.27-xen-sparse/include/asm-xen/pgalloc.h
@@@ -1,285 -1,0 +1,285 @@@
-         __make_page_writeable(pgd);
+ +#ifndef _I386_PGALLOC_H
+ +#define _I386_PGALLOC_H
+ +
+ +#include <linux/config.h>
+ +#include <asm/processor.h>
+ +#include <asm/fixmap.h>
+ +#include <asm/hypervisor.h>
+ +#include <linux/threads.h>
+ +
+ +/*
+ + * Quick lists are aligned so that least significant bits of array pointer
+ + * are all zero when list is empty, and all one when list is full.
+ + */
+ +#define QUICKLIST_ENTRIES 256
+ +#define QUICKLIST_EMPTY(_l) !((unsigned long)(_l) & ((QUICKLIST_ENTRIES*4)-1))
+ +#define QUICKLIST_FULL(_l)  QUICKLIST_EMPTY((_l)+1)
+ +#define pgd_quicklist (current_cpu_data.pgd_quick)
+ +#define pmd_quicklist (current_cpu_data.pmd_quick)
+ +#define pte_quicklist (current_cpu_data.pte_quick)
+ +#define pgtable_cache_size (current_cpu_data.pgtable_cache_sz)
+ +
+ +#define pmd_populate(mm, pmd, pte)              \
+ + do {                                             \
+ +  set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte)));   \
+ +  XEN_flush_page_update_queue();                 \
+ + } while ( 0 )
+ +
+ +/*
+ + * Allocate and free page tables.
+ + */
+ +
+ +#if defined (CONFIG_X86_PAE)
+ +
+ +#error "no PAE support as yet"
+ +
+ +/*
+ + * We can't include <linux/slab.h> here, thus these uglinesses.
+ + */
+ +struct kmem_cache_s;
+ +
+ +extern struct kmem_cache_s *pae_pgd_cachep;
+ +extern void *kmem_cache_alloc(struct kmem_cache_s *, int);
+ +extern void kmem_cache_free(struct kmem_cache_s *, void *);
+ +
+ +
+ +static inline pgd_t *get_pgd_slow(void)
+ +{
+ +      int i;
+ +      pgd_t *pgd = kmem_cache_alloc(pae_pgd_cachep, GFP_KERNEL);
+ +
+ +      if (pgd) {
+ +              for (i = 0; i < USER_PTRS_PER_PGD; i++) {
+ +                      unsigned long pmd = __get_free_page(GFP_KERNEL);
+ +                      if (!pmd)
+ +                              goto out_oom;
+ +                      clear_page(pmd);
+ +                      set_pgd(pgd + i, __pgd(1 + __pa(pmd)));
+ +              }
+ +              memcpy(pgd + USER_PTRS_PER_PGD,
+ +                      init_mm.pgd + USER_PTRS_PER_PGD,
+ +                      (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
+ +      }
+ +      return pgd;
+ +out_oom:
+ +      for (i--; i >= 0; i--)
+ +              free_page((unsigned long)__va(pgd_val(pgd[i])-1));
+ +      kmem_cache_free(pae_pgd_cachep, pgd);
+ +      return NULL;
+ +}
+ +
+ +#else
+ +
+ +static inline pgd_t *get_pgd_slow(void)
+ +{
+ +      pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL);
+ +
+ +      if (pgd) {
+ +              memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t));
+ +              memcpy(pgd + USER_PTRS_PER_PGD,
+ +                      init_mm.pgd + USER_PTRS_PER_PGD,
+ +                      (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
+ +                __make_page_readonly(pgd);
+ +              queue_pgd_pin(__pa(pgd));
+ +      }
+ +      return pgd;
+ +}
+ +
+ +#endif /* CONFIG_X86_PAE */
+ +
+ +static inline pgd_t *get_pgd_fast(void)
+ +{
+ +      unsigned long ret;
+ +
+ +      if ( !QUICKLIST_EMPTY(pgd_quicklist) ) {
+ +              ret = *(--pgd_quicklist);
+ +              pgtable_cache_size--;
+ +
+ +      } else
+ +              ret = (unsigned long)get_pgd_slow();
+ +      return (pgd_t *)ret;
+ +}
+ +
+ +static inline void free_pgd_slow(pgd_t *pgd)
+ +{
+ +#if defined(CONFIG_X86_PAE)
+ +#error
+ +      int i;
+ +
+ +      for (i = 0; i < USER_PTRS_PER_PGD; i++)
+ +              free_page((unsigned long)__va(pgd_val(pgd[i])-1));
+ +      kmem_cache_free(pae_pgd_cachep, pgd);
+ +#else
+ +      queue_pgd_unpin(__pa(pgd));
-     __make_page_writeable(pte);
++        __make_page_writable(pgd);
+ +      free_page((unsigned long)pgd);
+ +#endif
+ +}
+ +
+ +static inline void free_pgd_fast(pgd_t *pgd)
+ +{
+ +        if ( !QUICKLIST_FULL(pgd_quicklist) ) {
+ +                *(pgd_quicklist++) = (unsigned long)pgd;
+ +                pgtable_cache_size++;
+ +        } else
+ +                free_pgd_slow(pgd);
+ +}
+ +
+ +static inline pte_t *pte_alloc_one(struct mm_struct *mm, unsigned long address)
+ +{
+ +    pte_t *pte;
+ +
+ +    pte = (pte_t *) __get_free_page(GFP_KERNEL);
+ +    if (pte)
+ +    {
+ +        clear_page(pte);
+ +        __make_page_readonly(pte);
+ +        queue_pte_pin(__pa(pte));
+ +    }
+ +    return pte;
+ +
+ +}
+ +
+ +static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm,
+ +                                      unsigned long address)
+ +{
+ +    unsigned long ret = 0;
+ +    if ( !QUICKLIST_EMPTY(pte_quicklist) ) {
+ +        ret = *(--pte_quicklist);
+ +        pgtable_cache_size--;
+ +    }
+ +    return (pte_t *)ret;
+ +}
+ +
+ +static __inline__ void pte_free_slow(pte_t *pte)
+ +{
+ +    queue_pte_unpin(__pa(pte));
++    __make_page_writable(pte);
+ +    free_page((unsigned long)pte);
+ +}
+ +
+ +static inline void pte_free_fast(pte_t *pte)
+ +{
+ +    if ( !QUICKLIST_FULL(pte_quicklist) ) {
+ +        *(pte_quicklist++) = (unsigned long)pte;
+ +        pgtable_cache_size++;
+ +    } else
+ +        pte_free_slow(pte);
+ +}
+ +
+ +#define pte_free(pte)         pte_free_fast(pte)
+ +#define pgd_free(pgd)         free_pgd_fast(pgd)
+ +#define pgd_alloc(mm)         get_pgd_fast()
+ +
+ +/*
+ + * allocating and freeing a pmd is trivial: the 1-entry pmd is
+ + * inside the pgd, so has no extra memory associated with it.
+ + * (In the PAE case we free the pmds as part of the pgd.)
+ + */
+ +
+ +#define pmd_alloc_one_fast(mm, addr)  ({ BUG(); ((pmd_t *)1); })
+ +#define pmd_alloc_one(mm, addr)               ({ BUG(); ((pmd_t *)2); })
+ +#define pmd_free_slow(x)              do { } while (0)
+ +#define pmd_free_fast(x)              do { } while (0)
+ +#define pmd_free(x)                   do { } while (0)
+ +#define pgd_populate(mm, pmd, pte)    BUG()
+ +
+ +extern int do_check_pgt_cache(int, int);
+ +
+ +/*
+ + * TLB flushing:
+ + *
+ + *  - flush_tlb() flushes the current mm struct TLBs
+ + *  - flush_tlb_all() flushes all processes TLBs
+ + *  - flush_tlb_mm(mm) flushes the specified mm context TLB's
+ + *  - flush_tlb_page(vma, vmaddr) flushes one page
+ + *  - flush_tlb_range(mm, start, end) flushes a range of pages
+ + *  - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
+ + *
+ + * ..but the i386 has somewhat limited tlb flushing capabilities,
+ + * and page-granular flushes are available only on i486 and up.
+ + */
+ +
+ +#ifndef CONFIG_SMP
+ +
+ +#define flush_tlb() __flush_tlb()
+ +#define flush_tlb_all() __flush_tlb_all()
+ +#define local_flush_tlb() __flush_tlb()
+ +
+ +static inline void flush_tlb_mm(struct mm_struct *mm)
+ +{
+ +      if (mm == current->active_mm) queue_tlb_flush();
+ +      XEN_flush_page_update_queue();
+ +}
+ +
+ +static inline void flush_tlb_page(struct vm_area_struct *vma,
+ +      unsigned long addr)
+ +{
+ +      if (vma->vm_mm == current->active_mm) queue_invlpg(addr);
+ +      XEN_flush_page_update_queue();
+ +}
+ +
+ +static inline void flush_tlb_range(struct mm_struct *mm,
+ +      unsigned long start, unsigned long end)
+ +{
+ +      if (mm == current->active_mm) queue_tlb_flush();
+ +      XEN_flush_page_update_queue();
+ +}
+ +
+ +#else
+ +#error no guestos SMP support yet...
+ +#include <asm/smp.h>
+ +
+ +#define local_flush_tlb() \
+ +      __flush_tlb()
+ +
+ +extern void flush_tlb_all(void);
+ +extern void flush_tlb_current_task(void);
+ +extern void flush_tlb_mm(struct mm_struct *);
+ +extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
+ +
+ +#define flush_tlb()   flush_tlb_current_task()
+ +
+ +static inline void flush_tlb_range(struct mm_struct * mm, unsigned long start, unsigned long end)
+ +{
+ +      flush_tlb_mm(mm);
+ +}
+ +
+ +#define TLBSTATE_OK   1
+ +#define TLBSTATE_LAZY 2
+ +
+ +struct tlb_state
+ +{
+ +      struct mm_struct *active_mm;
+ +      int state;
+ +} ____cacheline_aligned;
+ +extern struct tlb_state cpu_tlbstate[NR_CPUS];
+ +
+ +#endif /* CONFIG_SMP */
+ +
+ +static inline void flush_tlb_pgtables(struct mm_struct *mm,
+ +                                    unsigned long start, unsigned long end)
+ +{
+ +    /* i386 does not keep any page table caches in TLB */
+ +    XEN_flush_page_update_queue();
+ +}
+ +
+ +/*
+ + * NB. The 'domid' field should be zero if mapping I/O space (non RAM).
+ + * Otherwise it identifies the owner of the memory that is being mapped.
+ + */
+ +extern int direct_remap_area_pages(struct mm_struct *mm,
+ +                                   unsigned long address, 
+ +                                   unsigned long machine_addr,
+ +                                   unsigned long size, 
+ +                                   pgprot_t prot,
+ +                                   domid_t  domid);
+ +
+ +extern int __direct_remap_area_pages(struct mm_struct *mm,
+ +                                   unsigned long address, 
+ +                                   unsigned long size, 
+ +                                   mmu_update_t *v);
+ +
+ +
+ +
+ +#endif /* _I386_PGALLOC_H */
diff --cc linux-2.4.27-xen-sparse/include/asm-xen/pgtable.h

index dc25864d2cda858f0c2eaaefde5e7a325f1496fa,0000000000000000000000000000000000000000..d3ece3a2a54a64cf6cde6fa1d9c9e1128584793f

mode 100644,000000..100644
--- 1/linux-2.4.27-xen-sparse/include/asm-xen/pgtable.h
--- /dev/null
+++ b/linux-2.4.27-xen-sparse/include/asm-xen/pgtable.h
@@@ -1,370 -1,0 +1,370 @@@
- static inline void __make_page_writeable(void *va)
+ +#ifndef _I386_PGTABLE_H
+ +#define _I386_PGTABLE_H
+ +
+ +#include <linux/config.h>
+ +
+ +/*
+ + * The Linux memory management assumes a three-level page table setup. On
+ + * the i386, we use that, but "fold" the mid level into the top-level page
+ + * table, so that we physically have the same two-level page table as the
+ + * i386 mmu expects.
+ + *
+ + * This file contains the functions and defines necessary to modify and use
+ + * the i386 page table tree.
+ + */
+ +#ifndef __ASSEMBLY__
+ +#include <asm/processor.h>
+ +#include <asm/hypervisor.h>
+ +#include <linux/threads.h>
+ +#include <asm/fixmap.h>
+ +
+ +#ifndef _I386_BITOPS_H
+ +#include <asm/bitops.h>
+ +#endif
+ +
+ +#define swapper_pg_dir 0
+ +extern void paging_init(void);
+ +
+ +/* Caches aren't brain-dead on the intel. */
+ +#define flush_cache_all()                     do { } while (0)
+ +#define flush_cache_mm(mm)                    do { } while (0)
+ +#define flush_cache_range(mm, start, end)     do { } while (0)
+ +#define flush_cache_page(vma, vmaddr)         do { } while (0)
+ +#define flush_page_to_ram(page)                       do { } while (0)
+ +#define flush_dcache_page(page)                       do { } while (0)
+ +#define flush_icache_range(start, end)                do { } while (0)
+ +#define flush_icache_page(vma,pg)             do { } while (0)
+ +#define flush_icache_user_range(vma,pg,adr,len)       do { } while (0)
+ +
+ +extern unsigned long pgkern_mask;
+ +
+ +#define __flush_tlb() ({ queue_tlb_flush(); XEN_flush_page_update_queue(); })
+ +#define __flush_tlb_global() __flush_tlb()
+ +#define __flush_tlb_all() __flush_tlb_global()
+ +#define __flush_tlb_one(addr) ({ queue_invlpg(addr); XEN_flush_page_update_queue(); })
+ +#define __flush_tlb_single(addr) ({ queue_invlpg(addr); XEN_flush_page_update_queue(); })
+ +
+ +/*
+ + * ZERO_PAGE is a global shared page that is always zero: used
+ + * for zero-mapped memory areas etc..
+ + */
+ +extern unsigned long empty_zero_page[1024];
+ +#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
+ +
+ +#endif /* !__ASSEMBLY__ */
+ +
+ +/*
+ + * The Linux x86 paging architecture is 'compile-time dual-mode', it
+ + * implements both the traditional 2-level x86 page tables and the
+ + * newer 3-level PAE-mode page tables.
+ + */
+ +#ifndef __ASSEMBLY__
+ +#if CONFIG_X86_PAE
+ +# include <asm/pgtable-3level.h>
+ +
+ +/*
+ + * Need to initialise the X86 PAE caches
+ + */
+ +extern void pgtable_cache_init(void);
+ +
+ +#else
+ +# include <asm/pgtable-2level.h>
+ +
+ +/*
+ + * No page table caches to initialise
+ + */
+ +#define pgtable_cache_init()  do { } while (0)
+ +
+ +#endif
+ +#endif
+ +
+ +#define PMD_SIZE      (1UL << PMD_SHIFT)
+ +#define PMD_MASK      (~(PMD_SIZE-1))
+ +#define PGDIR_SIZE    (1UL << PGDIR_SHIFT)
+ +#define PGDIR_MASK    (~(PGDIR_SIZE-1))
+ +
+ +#define USER_PTRS_PER_PGD     (TASK_SIZE/PGDIR_SIZE)
+ +#define FIRST_USER_PGD_NR     0
+ +
+ +#define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT)
+ +#define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS)
+ +
+ +#define TWOLEVEL_PGDIR_SHIFT  22
+ +#define BOOT_USER_PGD_PTRS (__PAGE_OFFSET >> TWOLEVEL_PGDIR_SHIFT)
+ +#define BOOT_KERNEL_PGD_PTRS (1024-BOOT_USER_PGD_PTRS)
+ +
+ +
+ +#ifndef __ASSEMBLY__
+ +/* 4MB is just a nice "safety zone". Also, we align to a fresh pde. */
+ +#define VMALLOC_OFFSET        (4*1024*1024)
+ +extern void * high_memory;
+ +#define VMALLOC_START (((unsigned long) high_memory + 2*VMALLOC_OFFSET-1) & \
+ +                                              ~(VMALLOC_OFFSET-1))
+ +#define VMALLOC_VMADDR(x) ((unsigned long)(x))
+ +#if CONFIG_HIGHMEM
+ +# define VMALLOC_END  (PKMAP_BASE-2*PAGE_SIZE)
+ +#else
+ +# define VMALLOC_END  (FIXADDR_START-2*PAGE_SIZE)
+ +#endif
+ +
+ +#define _PAGE_BIT_PRESENT     0
+ +#define _PAGE_BIT_RW          1
+ +#define _PAGE_BIT_USER                2
+ +#define _PAGE_BIT_PWT         3
+ +#define _PAGE_BIT_PCD         4
+ +#define _PAGE_BIT_ACCESSED    5
+ +#define _PAGE_BIT_DIRTY               6
+ +#define _PAGE_BIT_PSE         7       /* 4 MB (or 2MB) page, Pentium+, if present.. */
+ +#define _PAGE_BIT_GLOBAL      8       /* Global TLB entry PPro+ */
+ +
+ +#define _PAGE_PRESENT 0x001
+ +#define _PAGE_RW      0x002
+ +#define _PAGE_USER    0x004
+ +#define _PAGE_PWT     0x008
+ +#define _PAGE_PCD     0x010
+ +#define _PAGE_ACCESSED        0x020
+ +#define _PAGE_DIRTY   0x040
+ +#define _PAGE_PSE     0x080   /* 4 MB (or 2MB) page, Pentium+, if present.. */
+ +#define _PAGE_GLOBAL  0x100   /* Global TLB entry PPro+ */
+ +
+ +#define _PAGE_PROTNONE        0x080   /* If not present */
+ +
+ +#define _PAGE_TABLE   (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY)
+ +#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
+ +#define _PAGE_CHG_MASK        (PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
+ +
+ +#define PAGE_NONE     __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED)
+ +#define PAGE_SHARED   __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED)
+ +#define PAGE_COPY     __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
+ +#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
+ +
+ +#define __PAGE_KERNEL \
+ +      (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
+ +#define __PAGE_KERNEL_NOCACHE \
+ +      (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_PCD | _PAGE_ACCESSED)
+ +#define __PAGE_KERNEL_RO \
+ +      (_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED)
+ +
+ +#if 0
+ +#define MAKE_GLOBAL(x) __pgprot((x) | _PAGE_GLOBAL)
+ +#else
+ +#define MAKE_GLOBAL(x) __pgprot(x)
+ +#endif
+ +
+ +#define PAGE_KERNEL MAKE_GLOBAL(__PAGE_KERNEL)
+ +#define PAGE_KERNEL_RO MAKE_GLOBAL(__PAGE_KERNEL_RO)
+ +#define PAGE_KERNEL_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_NOCACHE)
+ +
+ +/*
+ + * The i386 can't do page protection for execute, and considers that
+ + * the same are read. Also, write permissions imply read permissions.
+ + * This is the closest we can get..
+ + */
+ +#define __P000        PAGE_NONE
+ +#define __P001        PAGE_READONLY
+ +#define __P010        PAGE_COPY
+ +#define __P011        PAGE_COPY
+ +#define __P100        PAGE_READONLY
+ +#define __P101        PAGE_READONLY
+ +#define __P110        PAGE_COPY
+ +#define __P111        PAGE_COPY
+ +
+ +#define __S000        PAGE_NONE
+ +#define __S001        PAGE_READONLY
+ +#define __S010        PAGE_SHARED
+ +#define __S011        PAGE_SHARED
+ +#define __S100        PAGE_READONLY
+ +#define __S101        PAGE_READONLY
+ +#define __S110        PAGE_SHARED
+ +#define __S111        PAGE_SHARED
+ +
+ +#define pte_present(x)        ((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE))
+ +#define pte_clear(xp) queue_l1_entry_update(xp, 0)
+ +
+ +#define pmd_none(x)   (!(x).pmd)
+ +#define pmd_present(x)        ((x).pmd & _PAGE_PRESENT)
+ +#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
+ +#define       pmd_bad(x)      (((x).pmd & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
+ +
+ +
+ +#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT))
+ +
+ +/*
+ + * The following only work if pte_present() is true.
+ + * Undefined behaviour if not..
+ + */
+ +static inline int pte_read(pte_t pte)         { return (pte).pte_low & _PAGE_USER; }
+ +static inline int pte_exec(pte_t pte)         { return (pte).pte_low & _PAGE_USER; }
+ +static inline int pte_dirty(pte_t pte)                { return (pte).pte_low & _PAGE_DIRTY; }
+ +static inline int pte_young(pte_t pte)                { return (pte).pte_low & _PAGE_ACCESSED; }
+ +static inline int pte_write(pte_t pte)                { return (pte).pte_low & _PAGE_RW; }
+ +
+ +static inline pte_t pte_rdprotect(pte_t pte)  { (pte).pte_low &= ~_PAGE_USER; return pte; }
+ +static inline pte_t pte_exprotect(pte_t pte)  { (pte).pte_low &= ~_PAGE_USER; return pte; }
+ +static inline pte_t pte_mkclean(pte_t pte)    { (pte).pte_low &= ~_PAGE_DIRTY; return pte; }
+ +static inline pte_t pte_mkold(pte_t pte)      { (pte).pte_low &= ~_PAGE_ACCESSED; return pte; }
+ +static inline pte_t pte_wrprotect(pte_t pte)  { (pte).pte_low &= ~_PAGE_RW; return pte; }
+ +static inline pte_t pte_mkread(pte_t pte)     { (pte).pte_low |= _PAGE_USER; return pte; }
+ +static inline pte_t pte_mkexec(pte_t pte)     { (pte).pte_low |= _PAGE_USER; return pte; }
+ +static inline pte_t pte_mkdirty(pte_t pte)    { (pte).pte_low |= _PAGE_DIRTY; return pte; }
+ +static inline pte_t pte_mkyoung(pte_t pte)    { (pte).pte_low |= _PAGE_ACCESSED; return pte; }
+ +static inline pte_t pte_mkwrite(pte_t pte)    { (pte).pte_low |= _PAGE_RW; return pte; }
+ +
+ +static inline int ptep_test_and_clear_dirty(pte_t *ptep)
+ +{
+ +    unsigned long pteval = *(unsigned long *)ptep;
+ +    int ret = pteval & _PAGE_DIRTY;
+ +    if ( ret ) queue_l1_entry_update(ptep, pteval & ~_PAGE_DIRTY);
+ +    return ret;
+ +}
+ +static inline  int ptep_test_and_clear_young(pte_t *ptep)
+ +{
+ +    unsigned long pteval = *(unsigned long *)ptep;
+ +    int ret = pteval & _PAGE_ACCESSED;
+ +    if ( ret ) queue_l1_entry_update(ptep, pteval & ~_PAGE_ACCESSED);
+ +    return ret;
+ +}
+ +static inline void ptep_set_wrprotect(pte_t *ptep)
+ +{
+ +    unsigned long pteval = *(unsigned long *)ptep;
+ +    if ( (pteval & _PAGE_RW) )
+ +        queue_l1_entry_update(ptep, pteval & ~_PAGE_RW);
+ +}
+ +static inline void ptep_mkdirty(pte_t *ptep)
+ +{
+ +    unsigned long pteval = *(unsigned long *)ptep;
+ +    if ( !(pteval & _PAGE_DIRTY) )
+ +        queue_l1_entry_update(ptep, pteval | _PAGE_DIRTY);
+ +}
+ +
+ +/*
+ + * Conversion functions: convert a page and protection to a page entry,
+ + * and a page entry and page directory to the page they refer to.
+ + */
+ +
+ +#define mk_pte(page, pgprot)  __mk_pte((page) - mem_map, (pgprot))
+ +
+ +/* This takes a physical page address that is used by the remapping functions */
+ +#define mk_pte_phys(physpage, pgprot) __mk_pte((physpage) >> PAGE_SHIFT, pgprot)
+ +
+ +static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+ +{
+ +      pte.pte_low &= _PAGE_CHG_MASK;
+ +      pte.pte_low |= pgprot_val(newprot);
+ +      return pte;
+ +}
+ +
+ +#define page_pte(page) page_pte_prot(page, __pgprot(0))
+ +
+ +#define pmd_page(pmd) \
+ +((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
+ +
+ +/* to find an entry in a page-table-directory. */
+ +#define pgd_index(address) ((address >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
+ +
+ +#define __pgd_offset(address) pgd_index(address)
+ +
+ +#define pgd_offset(mm, address) ((mm)->pgd+pgd_index(address))
+ +
+ +/* to find an entry in a kernel page-table-directory */
+ +#define pgd_offset_k(address) pgd_offset(&init_mm, address)
+ +
+ +#define __pmd_offset(address) \
+ +              (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
+ +
+ +/* Find an entry in the third-level page table.. */
+ +#define __pte_offset(address) \
+ +              ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+ +#define pte_offset(dir, address) ((pte_t *) pmd_page(*(dir)) + \
+ +                      __pte_offset(address))
+ +
+ +/*
+ + * The i386 doesn't have any external MMU info: the kernel page
+ + * tables contain all the necessary information.
+ + */
+ +#define update_mmu_cache(vma,address,pte) do { } while (0)
+ +
+ +/* Encode and de-code a swap entry */
+ +#define SWP_TYPE(x)                   (((x).val >> 1) & 0x3f)
+ +#define SWP_OFFSET(x)                 ((x).val >> 8)
+ +#define SWP_ENTRY(type, offset)               ((swp_entry_t) { ((type) << 1) | ((offset) << 8) })
+ +#define pte_to_swp_entry(pte)         ((swp_entry_t) { (pte).pte_low })
+ +#define swp_entry_to_pte(x)           ((pte_t) { (x).val })
+ +
+ +struct page;
+ +int change_page_attr(struct page *, int, pgprot_t prot);
+ +
+ +static inline void __make_page_readonly(void *va)
+ +{
+ +    pgd_t *pgd = pgd_offset_k((unsigned long)va);
+ +    pmd_t *pmd = pmd_offset(pgd, (unsigned long)va);
+ +    pte_t *pte = pte_offset(pmd, (unsigned long)va);
+ +    queue_l1_entry_update(pte, (*(unsigned long *)pte)&~_PAGE_RW);
+ +}
+ +
- static inline void make_page_writeable(void *va)
++static inline void __make_page_writable(void *va)
+ +{
+ +    pgd_t *pgd = pgd_offset_k((unsigned long)va);
+ +    pmd_t *pmd = pmd_offset(pgd, (unsigned long)va);
+ +    pte_t *pte = pte_offset(pmd, (unsigned long)va);
+ +    queue_l1_entry_update(pte, (*(unsigned long *)pte)|_PAGE_RW);
+ +}
+ +
+ +static inline void make_page_readonly(void *va)
+ +{
+ +    pgd_t *pgd = pgd_offset_k((unsigned long)va);
+ +    pmd_t *pmd = pmd_offset(pgd, (unsigned long)va);
+ +    pte_t *pte = pte_offset(pmd, (unsigned long)va);
+ +    queue_l1_entry_update(pte, (*(unsigned long *)pte)&~_PAGE_RW);
+ +    if ( (unsigned long)va >= VMALLOC_START )
+ +        __make_page_readonly(machine_to_virt(
+ +            *(unsigned long *)pte&PAGE_MASK));
+ +}
+ +
-         __make_page_writeable(machine_to_virt(
++static inline void make_page_writable(void *va)
+ +{
+ +    pgd_t *pgd = pgd_offset_k((unsigned long)va);
+ +    pmd_t *pmd = pmd_offset(pgd, (unsigned long)va);
+ +    pte_t *pte = pte_offset(pmd, (unsigned long)va);
+ +    queue_l1_entry_update(pte, (*(unsigned long *)pte)|_PAGE_RW);
+ +    if ( (unsigned long)va >= VMALLOC_START )
- static inline void make_pages_writeable(void *va, unsigned int nr)
++        __make_page_writable(machine_to_virt(
+ +            *(unsigned long *)pte&PAGE_MASK));
+ +}
+ +
+ +static inline void make_pages_readonly(void *va, unsigned int nr)
+ +{
+ +    while ( nr-- != 0 )
+ +    {
+ +        make_page_readonly(va);
+ +        va = (void *)((unsigned long)va + PAGE_SIZE);
+ +    }
+ +}
+ +
-         make_page_writeable(va);
++static inline void make_pages_writable(void *va, unsigned int nr)
+ +{
+ +    while ( nr-- != 0 )
+ +    {
++        make_page_writable(va);
+ +        va = (void *)((unsigned long)va + PAGE_SIZE);
+ +    }
+ +}
+ +
+ +static inline unsigned long arbitrary_virt_to_phys(void *va)
+ +{
+ +    pgd_t *pgd = pgd_offset_k((unsigned long)va);
+ +    pmd_t *pmd = pmd_offset(pgd, (unsigned long)va);
+ +    pte_t *pte = pte_offset(pmd, (unsigned long)va);
+ +    unsigned long pa = (*(unsigned long *)pte) & PAGE_MASK;
+ +    return pa | ((unsigned long)va & (PAGE_SIZE-1));
+ +}
+ +
+ +#endif /* !__ASSEMBLY__ */
+ +
+ +/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
+ +#define PageSkip(page)                (0)
+ +#define kern_addr_valid(addr) (1)
+ +
+ +#define io_remap_page_range remap_page_range
+ +
+ +#endif /* _I386_PGTABLE_H */
diff --cc linux-2.6.7-xen-sparse/include/asm-xen/asm-i386/pgtable.h
Simple merge
author	kaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>
	Tue, 24 Aug 2004 09:49:05 +0000 (09:49 +0000)
committer	kaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>
	Tue, 24 Aug 2004 09:49:05 +0000 (09:49 +0000)
		1	2
linux-2.4.27-xen-sparse/arch/xen/kernel/ldt.c	patch \|	diff1 \|	\|	blob \| history
linux-2.4.27-xen-sparse/arch/xen/kernel/traps.c	patch \|	diff1 \|	\|	blob \| history
linux-2.4.27-xen-sparse/include/asm-xen/pgalloc.h	patch \|	diff1 \|	\|	blob \| history
linux-2.4.27-xen-sparse/include/asm-xen/pgtable.h	patch \|	diff1 \|	\|	blob \| history
linux-2.6.7-xen-sparse/include/asm-xen/asm-i386/pgtable.h	patch \|	diff1 \|	diff2 \|	blob \| history